package com.shujia.spark.core

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD

object Demo8ReduceByKey {
  def main(args: Array[String]): Unit = {

    val conf: SparkConf = new SparkConf()
      .setAppName("map")
      .setMaster("local")

    //spark  上下文对象
    val sc = new SparkContext(conf)

    val linesRDD: RDD[String] = sc.textFile("data/words.txt")

    val wordsRDD: RDD[String] = linesRDD.flatMap(_.split(","))

    //将rdd转换成kv格式
    val kvRDD: RDD[(String, Int)] = wordsRDD.map(word => (word, 1))

    /**
      * reduceByKey: 对同一个key的value进行聚合处理
      *
      */

    val countRDD: RDD[(String, Int)] = kvRDD.reduceByKey((i: Int, j: Int) => i + j)

    countRDD.foreach(println)

    ////简写，如果参数只是用了溢出，可以通过下划线代替
    val count2: RDD[(String, Int)] = kvRDD.reduceByKey(_ + _)



  }

}
